#########################################################################
#### Replication materials for Huff and Schub IO
#### "Segregation, Integration, and Death: Evidence from the Korean War"
#########################################################################

## Outline
  # I.  Manuscript
  		# A. Segregation
  			# Figure 2
  			# Table 1
  		# B. Integration
  			# Figure 3
  			# Table 3
  		# C. Variance comparison results
  			# Manuscript Figure 4
  			
  # II. Online Appendix
  		# A. Segregation
  			# Tables A1-A5
  			# Figure A1
  		# B. Integration
  			# Table A6
  		# C. Variance comparison results
  			# Tables A7-A11 and Appendix Section 3.3


###############################
# Set working directory, load data, and load packages

setwd()

segall<-read.csv("Segregation_HS.csv") #format for segregation period analysis
segsub<-segall[segall$seg==1,] #subset to only segregated periods as used in main analysis
intall<-read.csv("Integration_HS.csv") #format for integration period analysis
tog<-read.csv("VarianceData_HS.csv") #format for variance across periods analysis

library(stargazer)
library(sandwich)
library(lmtest)
library(gdata)
library(zoo)
library(lawstat)

###############################
###############################
# I. Manuscript Results
###############################
###############################


##############################################################
# A. Segregation period results

##########
## Manuscript Figure 2: Fatality rates by race by period
##########

# Calculate average Black fatality rate per period
counts<-sort(unique(segsub$count))
mats<-matrix(NA,nrow=length(counts),ncol=4)
for(i in 1:length(counts)){
	#i<-2
	grab<-segsub[segsub$count==counts[i] & segsub$blackbn==1,]
	mats[i,1]<-counts[i]
	mats[i,2]<-nrow(grab)
	mats[i,3]<-sum(na.omit(grab$spec_fatal))
	mats[i,4]<-mean(na.omit(grab$spec_fatal_rate))
}

# Calculate average white fatality rate per period
matsw<-matrix(NA,nrow=length(counts),ncol=4)
for(i in 1:length(counts)){
	#i<-2
	grab<-segsub[segsub$count==counts[i] & segsub$blackbn==0,]
	matsw[i,1]<-counts[i]
	matsw[i,2]<-nrow(grab)
	matsw[i,3]<-sum(na.omit(grab$spec_fatal))
	matsw[i,4]<-mean(na.omit(grab$spec_fatal_rate))
}

periods<-sort(unique(segsub$period))

par(mar=c(2,4,1,2))
par(oma=c(2,1,0,0))
plot(segsub$count[segsub$blackbn==0]+0.2,segsub$spec_fatal_rate[segsub$blackbn==0],pch=5,col="grey80", ylim=c(0,0.32),cex=0.7,yaxt="n",xaxt="n",xlab="",ylab="")
points(segsub$count[segsub$blackbn==1],segsub$spec_fatal_rate[segsub$blackbn==1],cex=0.7)
points(counts+0.3,matsw[,4],type="l",pch=18,col="grey40",cex=1.2,lwd=4)
points(counts,mats[,4],type="l",pch=19,cex=1.2,lwd=4)
points(23,0.250,pch=5,col="grey60",cex=1.2)
points(23,0.220,pch=1,col="black",cex=1.3)
text(28,0.250,"white battalions")
text(28,0.220,"black battalions")
axis(2,at=seq(0,0.40,by=0.1),labels=c("0","10","20","30","40"),cex.axis=1.3)
axis(1,at=c(seq(1,32,by=4),32), labels=periods[c(1,5,9,13,17,21,25,29,32)],cex.axis=1.2)
mtext("Period (Year-Month-Half)",1,line=2.5,outer=F,cex=1.4)
mtext("Fatality (%)",2,line=3,outer=F,cex=1.6)


##########
## Manuscript Table 1: Main results
##########
m1a<-lm(spec_fatal_rate100 ~ blackbn, data=segsub)
m1b<-lm(spec_fatal_rate100 ~ blackbn + relevel(as.factor(count),ref=20), segsub)

stargazer(m1a,m1b,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2,covariate.labels=c("Black Bn"))


##############################################################
# B. Integration period results


##########
## Manuscript Figure 3: Fatality rates by race by period
##########
counts<-sort(unique(intall$count))
 # average Black fatality rate per period
mats<-matrix(NA,nrow=length(counts),ncol=4)
for(i in 1:length(counts)){
	#i<-2
	grab<-intall[intall$count==counts[i] & intall$blackbn==1,]
	mats[i,1]<-counts[i]
	mats[i,2]<-nrow(grab)
	mats[i,3]<-sum(na.omit(grab$specpct_imputed))
	mats[i,4]<-mean(na.omit(grab$specpct_imputed))
}
 # average white fatality rate per period
matsw<-matrix(NA,nrow=length(counts),ncol=4)
for(i in 1:length(counts)){
	#i<-2
	grab<-intall[intall$count==counts[i] & intall$blackbn==0,]
	matsw[i,1]<-counts[i]
	matsw[i,2]<-nrow(grab)
	matsw[i,3]<-sum(na.omit(grab$specpct_imputed))
	matsw[i,4]<-mean(na.omit(grab$specpct_imputed))
}

periods2<-sort(unique(intall$period))
report<-c(seq(1,42,by=3),42)

par(mar=c(2,4,1,2))
par(oma=c(2,1,0,0))
plot(intall$count[intall$blackbn==0]+0.2,intall$specpct_imputed[intall$blackbn==0],pch=5,col="grey80", ylim=c(0,0.12),cex=0.7,xlab="",ylab="",yaxt="n",xaxt="n")
points(intall$count[intall$blackbn==1]+0.2,intall$specpct_imputed[intall$blackbn==1],cex=0.7)
points(counts+0.2,matsw[,4],type="l",pch=18,col="grey40",cex=1.2,lwd=4)
points(counts,mats[,4],type="l",pch=19,cex=1.2,lwd=4)
points(64,0.115,pch=5,col="grey60",cex=1.2)
points(64,0.105,pch=1,col="black",cex=1.3)
text(67,0.115,"white")
text(67,0.105,"black")
axis(2,at=seq(0,0.12,by=0.03),labels=c(0,3,6,9,12),cex.axis=1.3)
axis(1,at=(report+32), labels=periods2[c(report)],cex.axis=1.2)
mtext("Period (Year-Month-Half)",1,line=2.5,outer=F,cex=1.4)
mtext("Fatality (%)",2,line=3,outer=F,cex=1.6)


##########
## Manuscript Table 3: Main results
##########
m1int1<-lm(specpct_imputed100 ~ blackbn, intall)
m1int2<-lm(specpct_imputed100 ~ blackbn + as.factor(count), intall)
m1int3<-lm(specpct_imputed100 ~ blackbn + as.factor(count) + as.factor(obs), intall)
m2int1<-lm(specpct_real100 ~ blackbn, intall[intall$black>=50,])
m2int2<-lm(specpct_real100 ~ blackbn + as.factor(count), intall[intall$black>=50,])
m2int3<-lm(specpct_real100 ~ blackbn + as.factor(count) + as.factor(obs), intall[intall$black>=50,])

stargazer(m1int1, m1int2, m1int3, m2int1, m2int2, m2int3,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2)



##############################################################
# C.  Variance comparison results

##########
## Manuscript Figure 4: Racial fatality rate differential by period
##########

 # aggregate up to the period level
counts<-sort(unique(tog$count))
mt<-matrix(NA,nrow=length(counts),ncol=5)
for(i in 1:length(counts)){
	#i<-2
	grab<-tog[tog$count==counts[i],]
	mt[i,1]<-counts[i]
	mt[i,2]<-nrow(grab[grab$blackbn==1 & !is.na(grab$seg),])
	mt[i,3]<-mean(na.omit(grab$specpct_imputed100[grab$blackbn==1]))
	mt[i,4]<-nrow(grab[grab$blackbn==0 & !is.na(grab$seg),])
	mt[i,5]<-mean(na.omit(grab$specpct_imputed100[grab$blackbn==0]))
}
mt<-as.data.frame(mt)
colnames(mt)<-c("count","blackunits","blackmean","whiteunits","whitemean")

mt$absdiff<-abs(mt$blackmean-mt$whitemean)
mt$diffbw<-mt$blackmean-mt$whitemean
mt$seg<-ifelse(mt$count<=32,1,0)

tick<-c(seq(1,75,by=4))
lab<-sort(unique(tog$period))[tick]

par(mar=c(2,5,1,2))
par(oma=c(2,1,0,0))
plot(mt$count[mt$count<=32],mt$diffbw[mt$count<=32],pch=19,xlim=c(0.5,74.5),xaxt="n",ylim=c(-6,6),yaxt="n",xlab="",ylab="")
points(mt$count[mt$count>32],mt$diffbw[mt$count>32],pch=1)
abline(h=0,lty=2)
axis(1, at=tick, labels=lab)
mtext("Period (Year-Month-Half)",1,line=2.5,outer=F,cex=1.4)
axis(2,at=seq(-6,6,by=2),cex.axis=1.3)
mtext("Fatality Rate Gap",2,line=4,outer=F,cex=1.6)
mtext("(Black % - White %)",2,line=2.5,outer=F,cex=1.6)
points(58,5,pch=19,cex=1.5)
points(58,4,pch=1,cex=1.5)
text(67.7,5,"segregated")
text(67,4,"integrated")









###############################
###############################
# II. Online Appendix Results
###############################
###############################

##############################################################
# A. Segregation period results

##########
## Online Appendix Table A1: Clustered SEs
##########
cl <- function(dat,fm, cluster){
require(sandwich, quietly = TRUE)
require(lmtest, quietly = TRUE)
M <- length(unique(cluster))
N <- length(cluster)
K <- fm$rank
dfc <- (M/(M-1))*((N-1)/(N-K))
uj <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
coeftest(fm, vcovCL) }

cl1a<-cl(segsub,m1a, segsub$cluster_id)
cl1b<-cl(segsub,m1b, segsub$cluster_id)

stargazer(m1a,m1b,se=list(cl1a[,2],cl1b[,2]),align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2,covariate.labels=c("Black Bn"))


##########
## Online Appendix Table A2: Varying battalion size by race
##########
segsub$toeblack20<-NA #black units on average have 20 MORE people
segsub$toeblack20[segsub$blackbn==1]<-segsub$toe[segsub$blackbn==1]+10
segsub$toeblack20[segsub$blackbn==0]<-segsub$toe[segsub$blackbn==0]-10
segsub$toeblack40<-NA 
segsub$toeblack40[segsub$blackbn==1]<-segsub$toe[segsub$blackbn==1]+20
segsub$toeblack40[segsub$blackbn==0]<-segsub$toe[segsub$blackbn==0]-20
segsub$toeblack60<-NA 
segsub$toeblack60[segsub$blackbn==1]<-segsub$toe[segsub$blackbn==1]+30
segsub$toeblack60[segsub$blackbn==0]<-segsub$toe[segsub$blackbn==0]-30
segsub$toeblack80<-NA 
segsub$toeblack80[segsub$blackbn==1]<-segsub$toe[segsub$blackbn==1]+40
segsub$toeblack80[segsub$blackbn==0]<-segsub$toe[segsub$blackbn==0]-40
segsub$toeblack100<-NA 
segsub$toeblack100[segsub$blackbn==1]<-segsub$toe[segsub$blackbn==1]+50
segsub$toeblack100[segsub$blackbn==0]<-segsub$toe[segsub$blackbn==0]-50

segsub$toewhite20<-NA #black units on average have 20 FEWER people
segsub$toewhite20[segsub$blackbn==1]<-segsub$toe[segsub$blackbn==1]-10
segsub$toewhite20[segsub$blackbn==0]<-segsub$toe[segsub$blackbn==0]+10
segsub$toewhite40<-NA 
segsub$toewhite40[segsub$blackbn==1]<-segsub$toe[segsub$blackbn==1]-20
segsub$toewhite40[segsub$blackbn==0]<-segsub$toe[segsub$blackbn==0]+20
segsub$toewhite60<-NA 
segsub$toewhite60[segsub$blackbn==1]<-segsub$toe[segsub$blackbn==1]-30
segsub$toewhite60[segsub$blackbn==0]<-segsub$toe[segsub$blackbn==0]+30
segsub$toewhite80<-NA 
segsub$toewhite80[segsub$blackbn==1]<-segsub$toe[segsub$blackbn==1]-40
segsub$toewhite80[segsub$blackbn==0]<-segsub$toe[segsub$blackbn==0]+40
segsub$toewhite100<-NA 
segsub$toewhite100[segsub$blackbn==1]<-segsub$toe[segsub$blackbn==1]-50
segsub$toewhite100[segsub$blackbn==0]<-segsub$toe[segsub$blackbn==0]+50

# Recalculate fatality rate given new denominators
segsub$spec_fatal_rate100_black20<-segsub$spec_fatal/segsub$toeblack20*100
segsub$spec_fatal_rate100_black40<-segsub$spec_fatal/segsub$toeblack40*100
segsub$spec_fatal_rate100_black60<-segsub$spec_fatal/segsub$toeblack60*100
segsub$spec_fatal_rate100_black80<-segsub$spec_fatal/segsub$toeblack80*100
segsub$spec_fatal_rate100_black100<-segsub$spec_fatal/segsub$toeblack100*100
segsub$spec_fatal_rate100_white20<-segsub$spec_fatal/segsub$toewhite20*100
segsub$spec_fatal_rate100_white40<-segsub$spec_fatal/segsub$toewhite40*100
segsub$spec_fatal_rate100_white60<-segsub$spec_fatal/segsub$toewhite60*100
segsub$spec_fatal_rate100_white80<-segsub$spec_fatal/segsub$toewhite80*100
segsub$spec_fatal_rate100_white100<-segsub$spec_fatal/segsub$toewhite100*100

m1ab100<-lm(spec_fatal_rate100_black100 ~ blackbn, data=segsub)
m1ab80<-lm(spec_fatal_rate100_black80 ~ blackbn, data=segsub)
m1ab60<-lm(spec_fatal_rate100_black60 ~ blackbn, data=segsub)
m1ab40<-lm(spec_fatal_rate100_black40 ~ blackbn, data=segsub)
m1ab20<-lm(spec_fatal_rate100_black20 ~ blackbn, data=segsub)
m1a<-lm(spec_fatal_rate100 ~ blackbn, data=segsub)
m1aw20<-lm(spec_fatal_rate100_white20 ~ blackbn, data=segsub)
m1aw40<-lm(spec_fatal_rate100_white40 ~ blackbn, data=segsub)
m1aw60<-lm(spec_fatal_rate100_white60 ~ blackbn, data=segsub)
m1aw80<-lm(spec_fatal_rate100_white80 ~ blackbn, data=segsub)
m1aw100<-lm(spec_fatal_rate100_white100 ~ blackbn, data=segsub)

stargazer(m1ab100,m1ab80,m1ab60,m1ab40,m1ab20,m1a,m1aw20,m1aw40,m1aw60,m1aw80,m1aw100,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2,covariate.labels=c("Black Bn"))

##########
## Online Appendix Table A3: Varying integration start date
##########
 # go +/- 4:as early as 1st half September '50 and as late 1st half January '51
segall$segneg4<-ifelse(segall$count<=28,1,0)
segall$segneg3<-ifelse(segall$count<=29,1,0)
segall$segneg2<-ifelse(segall$count<=30,1,0)
segall$segneg1<-ifelse(segall$count<=31,1,0)

segall$segpos4<-ifelse(segall$count<=36,1,0)
segall$segpos3<-ifelse(segall$count<=35,1,0)
segall$segpos2<-ifelse(segall$count<=34,1,0)
segall$segpos1<-ifelse(segall$count<=33,1,0)

m1asegneg4<-lm(spec_fatal_rate100 ~ blackbn, data=segall[segall$segneg4==1,])
m1asegneg3<-lm(spec_fatal_rate100 ~ blackbn, data=segall[segall$segneg3==1,])
m1asegneg2<-lm(spec_fatal_rate100 ~ blackbn, data=segall[segall$segneg2==1,])
m1asegneg1<-lm(spec_fatal_rate100 ~ blackbn, data=segall[segall$segneg1==1,])
m1a<-lm(spec_fatal_rate100 ~ blackbn, data=segall[segall$seg==1,])
m1asegpos1<-lm(spec_fatal_rate100 ~ blackbn, data=segall[segall$segpos1==1,])
m1asegpos2<-lm(spec_fatal_rate100 ~ blackbn, data=segall[segall$segpos2==1,])
m1asegpos3<-lm(spec_fatal_rate100 ~ blackbn, data=segall[segall$segpos3==1,])
m1asegpos4<-lm(spec_fatal_rate100 ~ blackbn, data=segall[segall$segpos4==1,])

stargazer(m1asegneg4,m1asegneg3,m1asegneg2,m1asegneg1,m1a,m1asegpos1,m1asegpos2,m1asegpos3,m1asegpos4,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2,covariate.labels=c("Black Bn"))

##########
## Online Appendix Table A4: Includes all fatalities within a unit regardless of individual's race
##########
m1atotal<-lm(total_fatal_rate100 ~ blackbn, data=segsub)
m1btotal<-lm(total_fatal_rate100 ~ blackbn + relevel(as.factor(count),ref=20), segsub)

stargazer(m1atotal,m1btotal,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2,covariate.labels=c("Black Bn"))

##########
## Online Appendix Table A5: Accounting for defense, offense, and stalemate
##########
offense = c(6:9,14:19)  #Incheon to full Chinese entry; following Chinese offensive stalling out
defense = c(1:5,10:13,20:24) #Opening of war; full Chinese entry; Enemy spring offensive 
stalemate = c(25:32) #from first talks in July 1951

segsub$warfare<-NA
segsub$warfare[segsub$count %in% offense]<-"offense"
segsub$warfare[segsub$count %in% defense]<-"defense"
segsub$warfare[segsub$count %in% stalemate]<-"stalemate"
segsub$warfare<-as.factor(as.character(segsub$warfare))
segsub$warfare=relevel(segsub$warfare, ref="stalemate")

off<-lm(spec_fatal_rate100 ~ blackbn, data=segsub[segsub$warfare=="offense",])
def<-lm(spec_fatal_rate100 ~ blackbn, data=segsub[segsub$warfare=="defense",])
sta<-lm(spec_fatal_rate100 ~ blackbn, data=segsub[segsub$warfare=="stalemate",])
intal<-lm(spec_fatal_rate100 ~ blackbn + as.factor(warfare) + blackbn:as.factor(warfare), data=segsub)

stargazer(def,off,sta,intal,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2,covariate.labels=c("Black Bn"))


##########
## Online Appendix Figure A1: Addressing CLT limitations
##########
 # Real test statistic [done without period FEs]
reg<-lm(spec_fatal_rate100 ~ blackbn, data=segsub)
realts<-summary(reg)$coefficients[2,4] #p-value

 # How many obs of each condition
tot<-nrow(segsub) 
btot<-nrow(segsub[segsub$blackbn==1,]) 
wtot<-nrow(segsub[segsub$blackbn==0,])

###############################
# SLOW TO RUN [can load object below with 100k iterations if preferable]
# set.seed(02138)
# n<-1000000 #iterations
# matt<-matrix(NA,nrow=n,ncol=4)
# for(i in 1:n){
# blackrows<-sample(nrow(segsub),btot)	
# blackdf<-segsub[c(blackrows),]
# whitedf<-segsub[-c(blackrows),]
# blackdf$blackbnri<-1
# whitedf$blackbnri<-0
# ridf<-as.data.frame(rbind(blackdf,whitedf))
# regri<-lm(spec_fatal_rate100 ~ blackbnri, data=ridf)
# rits<-summary(regri)$coefficients[2,4]
# matt[i,1]<-i
# matt[i,2]<-mean(blackdf$spec_fatal_rate100)
# matt[i,3]<-mean(whitedf$spec_fatal_rate100)
# matt[i,4]<-rits
# if(i %% 5000==0){print(i)}
# }
# matt<-as.data.frame(matt)
# colnames(matt)<-c("run","black_fatalrate","white_fatalrate","ts")
# head(matt)
# matt$diff<-abs(matt$black_fatalrate-matt$white_fatalrate)
###############################

# Load the saved object here if skipping the loop
matt<-readRDS("Segregation_RandomizationInference_1mm.rds")

 # Do p-values arise as they should 
nrow(matt[matt$ts<=0.05,])/nrow(matt)
## get a p-value of less than 0.05 in 4.5% of simulations 

 # Simulations with p-values less than real one
nrow(matt[matt$ts<realts,])/nrow(matt)  
## p-value we find in reality is higher than those in 81.5% of simulations

par(mar=c(2,4,1,2))
par(oma=c(2,1,2,0))
hist(matt$ts,breaks=20,xlab="p-values",main="")
abline(v=realts,lwd=2,col="firebrick")
hist(matt$ts[matt$ts<=0.05],add=T,col="dodgerblue3",breaks=1)
mtext("Simulated p-values",3, line=1,cex=1.5)
mtext("p-value",1, line=2,cex=1.2)






##############################################################
# B. Integration period results


##########
## Online Appendix Table A6: Aggregated to the raical-period (rather than racial-battalion-period) level
##########

 # Observed distribution of integration rates across battalions when known
realint<-intall[intall$black>=50 & !is.na(intall$specpct_real100),]
realint$realint_pct<-realint$black/realint$eff
summary(realint$realint_pct) # see min, max, and quartiles

 # Aggregated to period-level for analysis
   #white soldiers
cts<-sort(unique(intall$count))
mawhite<-matrix(NA,nrow=length(cts),ncol=6)
for(i in 1:length(cts)){
	grab<-intall[intall$count==cts[i] & intall$blackbn==0,]
	number<-nrow(grab)
	whitefatal<-sum(grab$white_fatal)
	white_imputed<-grab$toe[1]-grab$black_imputed[1]
	white_imputedtotal<-white_imputed*number
	blackbn<-0
	totalpct100<-whitefatal/white_imputedtotal*100
	mawhite[i,1]<-cts[i]
	mawhite[i,2]<-blackbn
	mawhite[i,3]<-number
	mawhite[i,4]<-white_imputed
	mawhite[i,5]<-white_imputedtotal
	mawhite[i,6]<-totalpct100
}
mawhitedf<-as.data.frame(mawhite)
colnames(mawhitedf)<-c("count","blackbn","period_obs","imputed","imputed_total","totalpct100")

  #Black soldiers
cts<-sort(unique(intall$count))
mablack<-matrix(NA,nrow=length(cts),ncol=6)
for(i in 1:length(cts)){
	#i<-1
	grab<-intall[intall$count==cts[i] & intall$blackbn==1,]
	number<-nrow(grab)
	blackfatal<-sum(grab$black_fatal)
	black_imputed<-grab$black_imputed[1]
	black_imputedtotal<-black_imputed*number
	blackbn<-1
	totalpct100<-blackfatal/black_imputedtotal*100
	mablack[i,1]<-cts[i]
	mablack[i,2]<-blackbn
	mablack[i,3]<-number
	mablack[i,4]<-black_imputed
	mablack[i,5]<-black_imputedtotal
	mablack[i,6]<-totalpct100
}
mablackdf<-as.data.frame(mablack)
colnames(mablackdf)<-c("count","blackbn","period_obs","imputed","imputed_total","totalpct100")

df<-rbind(mawhitedf,mablackdf)

aggint<-lm(totalpct100 ~ blackbn, df)
aggintfe<-lm(totalpct100 ~ blackbn + as.factor(count), df)
stargazer(aggint, aggintfe,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2)





##############################################################
# C.  Variance comparison results

##########
## Online Appendix Table A7: Absolute fatality rate gap (OLS)
##########

 # generate rescaled fatality rates to match between segregation and integration
segbench<-mean(c(mt$whitemean[mt$seg==1],mt$blackmean[mt$seg==1]))
integbench<-mean(c(mt$whitemean[mt$seg==0],mt$blackmean[mt$seg==0]))
inflator<-segbench/integbench
mt$whitemeaninfl<-NA
mt$blackmeaninfl<-NA
mt$absdiffinfl<-NA
mt$diffbwinfl<-NA
mt$whitemeaninfl[mt$seg==1]<-mt$whitemean[mt$seg==1]
mt$blackmeaninfl[mt$seg==1]<-mt$blackmean[mt$seg==1]
mt$absdiffinfl[mt$seg==1]<-abs(mt$blackmeaninfl[mt$seg==1]-mt$whitemeaninfl[mt$seg==1])
mt$diffbwinfl[mt$seg==1]<-mt$blackmeaninfl[mt$seg==1]-mt$whitemeaninfl[mt$seg==1]
mt$whitemeaninfl[mt$seg==0]<-mt$whitemean[mt$seg==0]*inflator
mt$blackmeaninfl[mt$seg==0]<-mt$blackmean[mt$seg==0]*inflator
mt$absdiffinfl[mt$seg==0]<-abs(mt$blackmeaninfl[mt$seg==0]-mt$whitemeaninfl[mt$seg==0])
mt$diffbwinfl[mt$seg==0]<-mt$blackmeaninfl[mt$seg==0]-mt$whitemeaninfl[mt$seg==0]

var1<-lm(absdiff ~ seg, mt)
var2<-lm(absdiffinfl ~ seg, mt)
stargazer(var1,var2,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=2)


##########
## Online Appendix Table A8: Levene's test
##########
 # observed fatality data
vars<-c("seg","diffbw")
lev<-mt[vars]
levene.test(lev$diffbw,lev[,"seg"], location="mean")
levene.test(lev$diffbw,lev[,"seg"], location="median")

 # inflated fatality data
vars<-c("seg","diffbwinfl")
lev<-mt[vars]
levene.test(lev$diffbwinfl,lev[,"seg"], location="mean")
levene.test(lev$diffbwinfl,lev[,"seg"], location="median")


##########
## Online Appendix Table A9: Randomization Inference
##########

 # Observed fatality data
  # real test statistic
realts<-var(mt$diffbw[mt$seg==1])-var(mt$diffbw[mt$seg==0]) 
  # get proportion treated
treatpct<-length(mt$seg[mt$seg==1])/length(mt$seg)  

set.seed(02138)
n<-10000 #iterations
matt<-matrix(NA,nrow=n,ncol=4)
for(i in 1:n){
rand<-rbinom(length(mt$seg),1,treatpct)
newdf<-as.data.frame(cbind(mt$diffbw,rand))
segvar<-var(newdf$V1[newdf$rand==1])
intvar<-var(newdf$V1[newdf$rand==0])
out<-segvar-intvar
matt[i,1]<-i
matt[i,2]<-segvar
matt[i,3]<-intvar
matt[i,4]<-out
}
matt<-as.data.frame(matt)
colnames(matt)<-c("run","segvar","intvar","ts")

1-nrow(matt[abs(matt$ts)<abs(realts),])/nrow(matt) #2.04%


 # Inflated fatality data
  # real test statistic
realts<-var(mt$diffbwinfl[mt$seg==1])-var(mt$diffbwinfl[mt$seg==0]) 
  # get proportion treated
treatpct<-length(mt$seg[mt$seg==1])/length(mt$seg)  

set.seed(02138)
n<-10000 #iterations
matt<-matrix(NA,nrow=n,ncol=4)
for(i in 1:n){
rand<-rbinom(length(mt$seg),1,treatpct)
newdf<-as.data.frame(cbind(mt$diffbwinfl,rand))
segvar<-var(newdf$V1[newdf$rand==1])
intvar<-var(newdf$V1[newdf$rand==0])
out<-segvar-intvar
matt[i,1]<-i
matt[i,2]<-segvar
matt[i,3]<-intvar
matt[i,4]<-out
}
matt<-as.data.frame(matt)
colnames(matt)<-c("run","segvar","intvar","ts")

1-nrow(matt[abs(matt$ts)<abs(realts),])/nrow(matt) #3.3%


##########
## Online Appendix Section 3.3 Analysis: Variance when holding fixed stalemate
##########
 
 # Subset to stalemate using two potential start points for stalemate
mtstale<-mt[mt$count>=21,] #starts May 1951
mtstale2<-mt[mt$count>=25,] #starts July 1951

 # OLS on absolute fatality rate gap
summary(lm(absdiff ~ seg, mtstale))
summary(lm(absdiff ~ seg, mtstale2))

 # Levene's test
vars<-c("seg","diffbw")
levstale<-mtstale[vars]
levene.test(levstale$diffbw, levstale[,"seg"], location="mean")
levene.test(levstale$diffbw, levstale[,"seg"], location="median")

levstale2<-mtstale2[vars]
levene.test(levstale2$diffbw, levstale2[,"seg"], location="mean")
levene.test(levstale2$diffbw, levstale2[,"seg"], location="median")

 # Randomization inference
 
  # real test statistic (May 1951 start to stalemate)
realts<-var(mtstale$diffbw[mtstale$seg==1])-var(mtstale$diffbw[mtstale$seg==0]) 
  # number seg
treatpct<-length(mtstale$seg[mtstale$seg==1])/length(mtstale$seg) 
set.seed(02138)
n<-10000 #iterations
matt<-matrix(NA,nrow=n,ncol=4)
for(i in 1:n){
rand<-rbinom(length(mtstale$seg),1,treatpct)
newdf<-as.data.frame(cbind(mtstale$diffbw,rand))
segvar<-var(newdf$V1[newdf$rand==1])
intvar<-var(newdf$V1[newdf$rand==0])
out<-segvar-intvar
matt[i,1]<-i
matt[i,2]<-segvar
matt[i,3]<-intvar
matt[i,4]<-out
}
matt<-as.data.frame(matt)
colnames(matt)<-c("run","segvar","intvar","ts")
1-nrow(matt[abs(matt$ts)<abs(realts),])/nrow(matt) #0.6%

  # real test statistic (July 1951 start to stalemate)
realts<-var(mtstale2$diffbw[mtstale2$seg==1])-var(mtstale2$diffbw[mtstale2$seg==0]) 
  # proportion treated
treatpct<-length(mtstale2$seg[mtstale2$seg==1])/length(mtstale2$seg) 

set.seed(02138)
n<-10000 #iterations
matt<-matrix(NA,nrow=n,ncol=4)
for(i in 1:n){
rand<-rbinom(length(mtstale2$seg),1,treatpct)
newdf<-as.data.frame(cbind(mtstale2$diffbw,rand))
segvar<-var(newdf$V1[newdf$rand==1])
intvar<-var(newdf$V1[newdf$rand==0])
out<-segvar-intvar
matt[i,1]<-i
matt[i,2]<-segvar
matt[i,3]<-intvar
matt[i,4]<-out
}
matt<-as.data.frame(matt)
colnames(matt)<-c("run","segvar","intvar","ts")
1-nrow(matt[abs(matt$ts)<abs(realts),])/nrow(matt) #9.4%


##########
## Online Appendix Table A10: Within-unit variance under segregation vs. integration
##########

  	# get the fatality rate gap within each bn period both raw and standardized by number of bn period deaths
		# for segregated period, randomly subset each bn into a "white" and "black" component
		# for integrated period, use the actual black vs. white components
	# holding fixed the bn, was variance for the gaps larger for the vector of gaps under seg or integ
	

 # subset of observations from segregated period
subset<-segsub

 # split segregated battalions in a small and large component
subset$smalltoe<-125.9
subset$largetoe<-subset$toe-subset$smalltoe
set.seed(0892)
for(i in 1:nrow(subset)){
	vec<-c(rep(1,subset$spec_fatal[i]),rep(0,(subset$toe[i]-subset$spec_fatal[i])))	
	vecsm<-sample(vec,round(subset$smalltoe),replace=F)
	subset$smallfatal[i]<-sum(vecsm)	
	subset$largefatal[i]<-subset$spec_fatal[i]-sum(vecsm)
}
head(subset)
subset$smallfatal100<-subset$smallfatal/subset$smalltoe*100
subset$largefatal100<-subset$largefatal/subset$largetoe*100
subset$bwdiff<-subset$largefatal100-subset$smallfatal100
subset$bwdiffstd<-ifelse(subset$spec_fatal>0,subset$bwdiff/subset$spec_fatal,0) # forces bn-periods with no fatalities to 0
subset$bwdiffstdNA<-subset$bwdiff/subset$spec_fatal # forces bn-periods with no fatalities NA

 # get within-unit variance for segregated period 
bns<-sort(unique(subset$obs))
segobs<-matrix(NA,ncol=6,nrow=length(bns))
for(i in 1:length(bns)){
	grab<-subset[subset$obs==bns[i],]
	vecdiff<-grab$bwdiff
	vecdiffstd<-grab$bwdiffstd
	vecdiffstdNA<-grab$bwdiffstdNA
	segobs[i,1]<-bns[i]
	segobs[i,2]<-length(vecdiff)
	segobs[i,3]<-var(vecdiff)
	segobs[i,4]<-var(vecdiffstd)
	segobs[i,5]<-var(vecdiffstdNA,na.rm=T)	
	segobs[i,6]<-length(na.omit(vecdiffstdNA))
}
head(segobs)
segobs<-as.data.frame(segobs)
colnames(segobs)<-c("obs","n","vargap","vargapstd","vargapstdNA","lengthgapstdNA")
segobs$seg<-1

 # prep integration period data
intvar<-intall[intall$blackbn==1,] #get rid of duplicates at battalion level 
intvar$blackfatal100<-intvar$black_fatal/intvar$black_imputed*100
intvar$whitefatal100<-intvar$white_fatal/(intvar$toe-intvar$black_imputed)*100
intvar$bwdiff<-intvar$blackfatal100-intvar$whitefatal100
intvar$bwdiffstd<-ifelse(intvar$total_fatal>0,intvar$bwdiff/intvar$total_fatal,0) # forces bn-periods with no fatalities to 0
intvar$bwdiffstdNA<-intvar$bwdiff/intvar$total_fatal # forces bn-periods with no fatalities NA

 # get within-unit variance for integrated period
bns<-sort(unique(intvar$obs))
integobs<-matrix(NA,ncol=6,nrow=length(bns))
for(i in 1:length(bns)){
	grab<-intvar[intvar$obs==bns[i],]
	vecdiff<-grab$bwdiff
	vecdiffstd<-grab$bwdiffstd
	vecdiffstdNA<-grab$bwdiffstdNA
	integobs[i,1]<-bns[i]
	integobs[i,2]<-length(vecdiff)
	integobs[i,3]<-var(vecdiff)
	integobs[i,4]<-var(vecdiffstd)	
	integobs[i,5]<-var(vecdiffstdNA,na.rm=T)
	integobs[i,6]<-length(na.omit(vecdiffstdNA))	
}
head(integobs)
integobs<-as.data.frame(integobs)
colnames(integobs)<-c("obs","n","vargap","vargapstd","vargapstdNA","lengthgapstdNA")
integobs$seg<-0


 # analysis of segregation vs. integration period within-unit variance
allobs<-rbind(segobs,integobs)
raw<-lm(vargap ~ seg, allobs[allobs$n>10,])
adj0<-lm(vargapstd ~ seg, allobs[allobs$n>10,]) 
adjNA<-lm(vargapstdNA ~ seg, allobs[allobs$lengthgapstdNA>10,]) 

stargazer(raw,adj0,adjNA,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=4)


##########
## Online Appendix Table A11: Between-unit variance under segregation vs. integration
##########

	# get the fatality rate gap between black and white bns in each period both raw and standardized by number of period deaths (avg fatality rate black units - avg fatality rate white units)
		# for segregated period, use the actual black vs. white components
		# for integrated period, randomly assign each bn as white or black
	# calculate the absolute fatality rate gap (and standardized gap) for each period 
	# analyze whether the gaps were larger under seg or integ


# Segregated Units
ct<-sort(unique(subset$count))
segbtw<-matrix(NA,ncol=9,nrow=length(ct))
for(i in 1:length(ct)){
	grab<-subset[subset$count==i,]
	segbtw[i,1]<-ct[i]
	segbtw[i,2]<-nrow(grab)
	segbtw[i,3]<-sum(grab$spec_fatal,na.rm=T)
	segbtw[i,4]<-mean(grab$spec_fatal_rate100[grab$blackbn==1])
	segbtw[i,5]<-length(grab$spec_fatal_rate100[grab$blackbn==1])
	segbtw[i,6]<-mean(grab$spec_fatal_rate100[grab$blackbn==0])
	segbtw[i,7]<-length(grab$spec_fatal_rate100[grab$blackbn==0])
	segbtw[i,8]<-segbtw[i,4]-segbtw[i,6]
	segbtw[i,9]<-segbtw[i,8]/segbtw[i,3]
}
segbtw<-as.data.frame(segbtw)
colnames(segbtw)<-c("count","bns","fatalities","black_fatal100","black_bns","white_fatal100","white_bns","bwdiff","bwdiffstd")


# Integrated Units
intvar$fatalrate100<-(intvar$total_fatal/intvar$toe)*100

  #assign units as black or white with same proportions as under segregation
prbl<-nrow(subset[subset$blackbn==1,])/nrow(subset)  

set.seed(0892)
ct<-sort(unique(intvar$count))
holder<-intvar[1,]
holder$blackbn<-NA
for(i in 1:length(ct)){
	get<-intvar[intvar$count==ct[i],]
	ln<-nrow(get)
	blbn<-round(ln*prbl)
	vec<-c(rep(1,blbn),rep(0,ln-blbn))
	vecsm<-sample(vec,ln,replace=F)
	get$blackbn<-vecsm
	holder<-rbind(holder,get)
}
head(holder)
holder<-holder[-1,]

integbtw<-matrix(NA,ncol=9,nrow=length(ct))
for(i in 1:length(ct)){
	#i<-1
	grab<-holder[holder$count==ct[i],]
	integbtw[i,1]<-ct[i]
	integbtw[i,2]<-nrow(grab)
	integbtw[i,3]<-sum(grab$total_fatal,na.rm=T)
	integbtw[i,4]<-mean(grab$fatalrate100[grab$blackbn==1])
	integbtw[i,5]<-length(grab$fatalrate100[grab$blackbn==1])
	integbtw[i,6]<-mean(grab$fatalrate100[grab$blackbn==0])
	integbtw[i,7]<-length(grab$fatalrate100[grab$blackbn==0])
	integbtw[i,8]<-integbtw[i,4]-integbtw[i,6]
	integbtw[i,9]<-integbtw[i,8]/integbtw[i,3]
}
integbtw<-as.data.frame(integbtw)
colnames(integbtw)<-c("count","bns","fatalities","black_fatal100","black_bns","white_fatal100","white_bns","bwdiff","bwdiffstd")

# Results Together
allbtw<-rbind(segbtw,integbtw)
allbtw$seg<-ifelse(allbtw$count<33,1,0)

allbtw$absbwdiff<-abs(allbtw$bwdiff)
allbtw$absbwdiffstd<-abs(allbtw$bwdiffstd)

rawbtw<-lm(absbwdiff ~ seg, allbtw) 
adjbtw<-lm(absbwdiffstd ~ seg, allbtw) 

stargazer(rawbtw,adjbtw,align=F,style="apsr",omit.stat=c("f","ser","adj.rsq","rsq"),digits=4)



